Compiled under R version 3.6.3 (2020-02-29)

WARNING: edit the working directory to your preferred folder.

This document details all analyses performed in R for the study:
Legendre, L. J., D. Rubilar-Rogers, G. M. Musser, S. N. Davis, R. A. Otero, A. O. Vargas, and J. A. Clarke. 2020. A giant soft-shelled egg from the Late Cretaceous of Antarctica. Nature.

For more information regarding the study, datasets, and analyses, please refer to the Supplementary Information of this paper. If you have any additional questions, feel free to email me at .

Loading packages

library(AICcmodavg)
library(ape)
library(caper)
library(dplyr)
library(evobiR)
library(ggplot2)
library(MPSEM)
library(nlme)
library(nortest)
library(phylopath)
library(phytools)
library(RColorBrewer)

1 – Lepidosaur dataset

This part requires the use of dataset 1 (“Dataset1-lepidosaurs.txt”; see also Supplementary Table 1 in Legendre et al., 2020), and lepidosaur phylogenetic tree (“Lepidosaurtree.trees.nex”).

  • Loading the tree:
tree<-read.nexus("Lepidosaurtree.trees.nex")

Phylogenetic generalized least squares (PGLS)

Egg volume ~ SVL (snout-vent length)

  • Loading the dataset and removing species with missing data
data2<-read.table("Dataset1-lepidosaurs.txt", header=T); data2V<-subset(data2, !is.na(V)&!is.na(SVL))
pruned.tree<-drop.tip(tree, setdiff(tree$tip.label, data2V$Species))
data<-read.table("Dataset1-lepidosaurs.txt", header=T, row.names = "Species"); dataV<-subset(data, !is.na(V)&!is.na(SVL))
  • Best fit for alpha parameter in OU (Ornstein-Uhlenbeck) model
alpha <- seq(0, 1, 0.1)
fit <- list()
form <- log(V)~log(SVL)
for (i in seq_along(alpha)) {
  cor <- corMartins(alpha[i], phy = pruned.tree, fixed = T)
  fit[[i]] <- gls(form, correlation = cor, data = data, na.action=na.exclude, method = "ML")
}
sapply(fit, logLik)
##  [1]       Inf -319.2756 -323.6809 -324.3353 -324.6167 -324.8171 -324.9815
##  [8] -325.1226 -325.2464 -325.3564 -325.4550
  • Building PGLS models and running model selection using AICc
BM<-gls(log(V)~log(SVL), data=dataV, correlation=corBrownian(phy=pruned.tree), method="ML")
OU<-gls(log(V)~log(SVL), data=dataV, correlation=corMartins(1, phy=pruned.tree, fixed=T), method="ML")
Lambda<-gls(log(V)~log(SVL), data=dataV, correlation=corPagel(1, phy=pruned.tree), method="ML")
EB<-gls(log(V)~log(SVL), data=dataV, correlation=corBlomberg(0.1, phy=pruned.tree), method="ML")
OLS<-gls(log(V)~log(SVL), data=dataV, method="ML")

Cand.models = list()
Cand.models[[1]] = BM
Cand.models[[2]] = OU
Cand.models[[3]] = Lambda
Cand.models[[4]] = EB
Cand.models[[5]] = OLS

Modnames = paste(c("BM", "OU", "Lambda", "EB", "OLS"), sep = " ")
aictab(cand.set = Cand.models, modnames = Modnames, sort = T)
summary(Lambda)
## Generalized least squares fit by maximum likelihood
##   Model: log(V) ~ log(SVL) 
##   Data: dataV 
##        AIC      BIC    logLik
##   485.0497 498.9889 -238.5248
## 
## Correlation Structure: corPagel
##  Formula: ~1 
##  Parameter estimate(s):
##    lambda 
## 0.8802218 
## 
## Coefficients:
##                  Value Std.Error   t-value p-value
## (Intercept) -0.7990941 0.7306413 -1.093689  0.2752
## log(SVL)     1.6708400 0.0941323 17.749917  0.0000
## 
##  Correlation: 
##          (Intr)
## log(SVL) -0.63 
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -3.11666418 -0.80674408 -0.01263292  0.65520753  1.74914328 
## 
## Residual standard error: 1.168844 
## Degrees of freedom: 241 total; 239 residual
  • Testing for normality
plot(Lambda, resid(., type="n")~fitted(.), main="Normalized Residuals v Fitted Values",abline=c(0,0))

res<-resid(Lambda, type="n")
par(mar=c(5.1,4.1,4.1,2.1))
qqnorm(res)
qqline(res)

lillie.test(residuals(Lambda))
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  residuals(Lambda)
## D = 0.07408, p-value = 0.002707
lillie.test(chol(solve(vcv(pruned.tree)))%*%residuals(Lambda))
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  chol(solve(vcv(pruned.tree))) %*% residuals(Lambda)
## D = 0.15828, p-value = 4.371e-16
  • Removing outliers and building a new model without them
which(res>4); which(res<(-2.5))
## Cryptoblepharus_poecilopleurus 
##                            168
## Chirindia_ewerbecki Micrurus_mipartitus 
##                  89                 222
newdata<-dataV[-c(89,168,222),]
newdata2<-data2V[-c(89,168,222),]
newtree<-drop.tip(pruned.tree, setdiff(pruned.tree$tip.label, newdata2$Species))

Newlambda<-gls(log(V)~log(SVL), data=newdata, correlation=corPagel(1, phy=newtree), method="ML")
summary(Newlambda)
## Generalized least squares fit by maximum likelihood
##   Model: log(V) ~ log(SVL) 
##   Data: newdata 
##       AIC      BIC    logLik
##   428.773 442.6621 -210.3865
## 
## Correlation Structure: corPagel
##  Formula: ~1 
##  Parameter estimate(s):
##    lambda 
## 0.9221827 
## 
## Coefficients:
##                  Value Std.Error t-value p-value
## (Intercept) -0.8303776 0.7046654 -1.1784  0.2398
## log(SVL)     1.6870012 0.0865058 19.5016  0.0000
## 
##  Correlation: 
##          (Intr)
## log(SVL) -0.6  
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -3.24045710 -0.84905671 -0.03920481  0.62244401  1.55314354 
## 
## Residual standard error: 1.1406 
## Degrees of freedom: 238 total; 236 residual
plot(Newlambda, resid(., type="n")~fitted(.), main="Normalized Residuals v Fitted Values",abline=c(0,0))

res<-resid(Newlambda, type="n")
par(mar=c(5.1,4.1,4.1,2.1))
qqnorm(res)
qqline(res)

lillie.test(residuals(Newlambda))
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  residuals(Newlambda)
## D = 0.074716, p-value = 0.002609
lillie.test(chol(solve(vcv(newtree)))%*%residuals(Newlambda))
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  chol(solve(vcv(newtree))) %*% residuals(Newlambda)
## D = 0.13226, p-value = 7.181e-11
  • Estimating the pseudo R-squared using caper
#newlambda:
datacomp<-comparative.data(phy=newtree, data=newdata2, names.col="Species")
pgls<-pgls(log(V)~log(SVL), data=datacomp, lambda="ML")
summary(pgls)
## 
## Call:
## pgls(formula = log(V) ~ log(SVL), data = datacomp, lambda = "ML")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.107910 -0.039203 -0.005097  0.034535  0.147102 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.718
##    lower bound : 0.000, p = 1.8499e-05
##    upper bound : 1.000, p = 4.5223e-05
##    95.0% CI   : (0.413, 0.908)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) -0.41450    0.91690 -0.4521    0.6528    
## log(SVL)     1.63215    0.15212 10.7293 6.661e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06154 on 64 degrees of freedom
## Multiple R-squared: 0.6427,  Adjusted R-squared: 0.6371 
## F-statistic: 115.1 on 1 and 64 DF,  p-value: 6.121e-16
#lambda:
datacompl<-comparative.data(phy=pruned.tree, data=data2V, names.col="Species")
pgls<-pgls(log(V)~log(SVL), data=datacompl, lambda="ML")
summary(pgls)
## 
## Call:
## pgls(formula = log(V) ~ log(SVL), data = datacompl, lambda = "ML")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.140589 -0.048249 -0.004924  0.031043  0.155446 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.715
##    lower bound : 0.000, p = 7.7636e-05
##    upper bound : 1.000, p = 0.00015352
##    95.0% CI   : (0.381, 0.918)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)  0.19647    0.96534  0.2035    0.8393    
## log(SVL)     1.53068    0.16093  9.5115 5.418e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06683 on 66 degrees of freedom
## Multiple R-squared: 0.5782,  Adjusted R-squared: 0.5718 
## F-statistic: 90.47 on 1 and 66 DF,  p-value: 5.417e-14

Not much difference between the two models, assumptions of normality are not met in either of them – we use lambda instead of newlambda for the plot.

  • Plot of the regression
ggplot(dataV, aes(log(SVL), log(V), color=Suborder)) +
geom_point(size=4) +
  xlab("ln snout-vent length (mm)") +
  ylab("ln egg volume (mm3)") +
  geom_abline(intercept=-0.7990941, slope=1.6708400, colour="lightblue", size=1.3) +
  theme(panel.background = element_rect(fill="black")) +
  theme(axis.text=element_text(size=15),
        axis.title=element_text(size=15),
        legend.text=element_text(size=15),
        legend.title=element_text(size=17)) +
        scale_colour_brewer("Suborder", palette="Accent")

  • Estimating the SVL of Antarcticoolithus using the allometric equation of PGLS, including it in the dataset, then in the plot
exp((-0.7990941/1.6708400)+(1/1.6708400)*log(5471405.789))
## [1] 6684.303
data[1,7]<-6684.303
dataV2<-subset(data, !is.na(V))

ggplot(dataV2, aes(log(SVL), log(V), color=Suborder)) +
  geom_point(size=4) +
  xlab("ln snout-vent length (mm)") +
  ylab("ln egg volume (mm3)") +
  geom_abline(intercept=-0.7990941, slope=1.6708400, colour="lightblue") +
  theme(panel.background = element_rect(fill="black")) +
  theme(axis.text=element_text(size=15),
        axis.title=element_text(size=15),
        legend.text=element_text(size=15),
        legend.title=element_text(size=17)) +
  scale_colour_brewer("Suborder", palette="Accent")

Eggshell thickness ~ body mass is not detailed here, since the code is virtually identical – only variable names and regression coefficients for the plot and estimation are different.

  • Estimating the body mass of Antarcticoolithus using the allometric equation of PGLS, including it in the dataset, then in the plot
LambdaBM<-gls(log(V)~log(BM), data=dataV, correlation=corPagel(1, phy=pruned.tree), method="ML")
summary(LambdaBM)
## Generalized least squares fit by maximum likelihood
##   Model: log(V) ~ log(BM) 
##   Data: dataV 
##        AIC      BIC    logLik
##   466.1175 480.0567 -229.0587
## 
## Correlation Structure: corPagel
##  Formula: ~1 
##  Parameter estimate(s):
##    lambda 
## 0.7332279 
## 
## Coefficients:
##                Value Std.Error  t-value p-value
## (Intercept) 5.050556 0.4341909 11.63211       0
## log(BM)     0.619536 0.0300611 20.60926       0
## 
##  Correlation: 
##         (Intr)
## log(BM) -0.254
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -2.1068229 -0.3986971  0.2864704  0.8395741  2.8852589 
## 
## Residual standard error: 0.9261633 
## Degrees of freedom: 241 total; 239 residual
exp(-(5.050557/0.619536)+(1/0.619536)*log(5471405.789))
## [1] 21657214
data[1,8]<-21657214
dataV2<-subset(data, !is.na(V))

ggplot(dataV2, aes(log(BM), log(V), color=Suborder)) +
  geom_point(size=4) +
  xlab("ln body mass (g)") +
  ylab("ln egg volume (mm3)") +
  geom_abline(intercept=5.050556, slope=0.619536, colour="lightblue") +
  theme(panel.background = element_rect(fill="black")) +
  theme(axis.text=element_text(size=15),
        axis.title=element_text(size=15),
        legend.text=element_text(size=15),
        legend.title=element_text(size=17)) +
  scale_colour_brewer("Suborder", palette="Set1")

Eggshell thickness ~ Egg volume

  • Removing species with missing data
dataVT2<-subset(data2, !is.na(Thickness)&!is.na(V)); dataVT2<-dataVT2[-1,]
pruned.treeVT<-drop.tip(tree, setdiff(tree$tip.label, dataVT2$Species))
dataVT<-subset(data, !is.na(Thickness)&!is.na(V)); dataVT<-dataVT[-1,]
  • Building PGLS models and running model selection using AICc
BMVT<-gls(log(Thickness)~log(V), data=dataVT, correlation=corBrownian(phy=pruned.treeVT), method="ML")
OUVT<-gls(log(Thickness)~log(V), data=dataVT, correlation=corMartins(1, phy=pruned.treeVT), method="ML")
LambdaVT<-gls(log(Thickness)~log(V), data=dataVT, correlation=corPagel(1, phy=pruned.treeVT), method="ML")
EBVT<-gls(log(Thickness)~log(V), data=dataVT, correlation=corBlomberg(0.1, phy=pruned.treeVT, fixed=T), method="ML")
OLSVT<-gls(log(Thickness)~log(V), data=dataVT, method="ML")

Cand.models = list()
Cand.models[[1]] = BMVT
Cand.models[[2]] = OUVT
Cand.models[[3]] = LambdaVT
Cand.models[[4]] = EBVT
Cand.models[[5]] = OLSVT

Modnames = paste(c("BMVT", "OUVT", "LambdaVT", "EBVT", "OLSVT"), sep = " ")
aictab(cand.set = Cand.models, modnames = Modnames, sort = T)
summary(EBVT)
## Generalized least squares fit by maximum likelihood
##   Model: log(Thickness) ~ log(V) 
##   Data: dataVT 
##        AIC      BIC    logLik
##   134.5443 141.2028 -64.27216
## 
## Correlation Structure: corBlomberg
##  Formula: ~1 
##  Parameter estimate(s):
##   g 
## 0.1 
## 
## Coefficients:
##                 Value Std.Error  t-value p-value
## (Intercept) 2.0900026 0.4535976 4.607614       0
## log(V)      0.3270484 0.0574324 5.694490       0
## 
##  Correlation: 
##        (Intr)
## log(V) -0.972
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -2.9317947 -0.7662902 -0.2506952  0.4323433  3.0468614 
## 
## Residual standard error: 0.6624366 
## Degrees of freedom: 68 total; 66 residual
  • Testing for normality
lillie.test(residuals(EBVT))
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  residuals(EBVT)
## D = 0.073029, p-value = 0.4927
lillie.test(chol(solve(vcv(pruned.treeVT)))%*%residuals(EBVT))
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  chol(solve(vcv(pruned.treeVT))) %*% residuals(EBVT)
## D = 0.12937, p-value = 0.006569
plot(EBVT, resid(., type="n")~fitted(.), main="Normalized Residuals v Fitted Values",abline=c(0,0))

res<-resid(EBVT, type="n")
par(mar=c(5.1,4.1,4.1,2.1))
qqnorm(res)
qqline(res)

  • Removing outliers and building a new model without them
which(res<(-2)); which(res>2.5)
## Zootoca_vivipara_viviparous 
##                          45
## Chamaeleo_senegalensis 
##                     19
newdataVT<-dataVT[-c(19,45),]
newdataVT2<-dataVT2[-c(19,45),]
newtreeVT<-drop.tip(pruned.treeVT, c("Zootoca_vivipara_viviparous","Chamaeleo_senegalensis"))

NewEBVT<-gls(log(Thickness)~log(V), data=newdataVT, correlation=corBlomberg(0.1, phy=newtreeVT, fixed=T), method="ML")
summary(NewEBVT)
## Generalized least squares fit by maximum likelihood
##   Model: log(Thickness) ~ log(V) 
##   Data: newdataVT 
##        AIC      BIC    logLik
##   114.2644 120.8334 -54.13222
## 
## Correlation Structure: corBlomberg
##  Formula: ~1 
##  Parameter estimate(s):
##   g 
## 0.1 
## 
## Coefficients:
##                 Value Std.Error  t-value p-value
## (Intercept) 2.1246786 0.4052994 5.242244       0
## log(V)      0.3209485 0.0511337 6.276657       0
## 
##  Correlation: 
##        (Intr)
## log(V) -0.972
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -2.7567958 -0.8478322 -0.2694998  0.4934623  2.2910465 
## 
## Residual standard error: 0.5849121 
## Degrees of freedom: 66 total; 64 residual
plot(NewEBVT, resid(., type="n")~fitted(.), main="Normalized Residuals v Fitted Values",abline=c(0,0))

res<-resid(NewEBVT, type="n")
par(mar=c(5.1,4.1,4.1,2.1))
qqnorm(res)
qqline(res)

lillie.test(residuals(NewEBVT))
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  residuals(NewEBVT)
## D = 0.085226, p-value = 0.2753
lillie.test(chol(solve(vcv(newtreeVT)))%*%residuals(NewEBVT))
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  chol(solve(vcv(newtreeVT))) %*% residuals(NewEBVT)
## D = 0.12101, p-value = 0.01773
  • Plot of the regression
NewEBVT$coefficients
## (Intercept)      log(V) 
##   2.1246786   0.3209485
ggplot(newdataVT, aes(log(V), log(Thickness), color=Suborder)) +
  geom_point(size=4) +
  xlab("ln egg volume (mm3)") +
  ylab("ln eggshell thickness (µm)") +
  geom_abline(intercept=2.1246786, slope=0.3209485, colour="lightblue") +
  theme(panel.background = element_rect(fill="black")) +
  theme(axis.text=element_text(size=15),
        axis.title=element_text(size=15),
        legend.text=element_text(size=15),
        legend.title=element_text(size=17)) +
  scale_colour_brewer("Suborder", palette="Accent")

Phylogenetic path analysis

  • Building the models
ppadata<-dataVT
ppadata[,c(4,6:8)]<-log(ppadata[,c(4,6:8)])
M<-define_model_set(null=c(),
                    one=c(Thickness~SVL),
                    two=c(V~BM),
                    three=c(Thickness~V),
                    four=c(Thickness~V+SVL),
                    five=c(Thickness~V, V~BM),
                    six=c(Thickness~SVL, V~BM),
                    seven=c(Thickness~SVL+BM),
                    eight=c(V~BM, Thickness~BM),
                    nine=c(Thickness~V+BM),
                    ten=c(Thickness~SVL+BM+V),
                    eleven=c(Thickness~V+BM, V~BM),
                    twelve=c(Thickness~SVL+BM, V~BM),
                    thirteen=c(Thickness~BM),
                    .common=c(BM~SVL, V~SVL))
  • Plot all models and compile best model using CICc
plot_model_set(M)
## Warning: The curvature argument has been deprecated in favour of strength

result<-phylo_path(M, data=ppadata, tree=pruned.treeVT, model='lambda'); result
## A phylogenetic path analysis, on the variables:
##  Continuous:  Thickness SVL V BM 
##  Binary:       
## 
##  Evaluated for these models: null one two three four five six seven eight nine ten eleven twelve thirteen 
## 
##  Containing 31 phylogenetic regressions, of which 10 unique
s<-summary(result); s
plot(s)
## Warning: Position guide is perpendicular to the intended axis. Did you mean to
## specify a different guide `position`?
## Warning: guide_axis(): Discarding guide on merge. Do you have more than one
## guide with the same position?

## Warning: guide_axis(): Discarding guide on merge. Do you have more than one
## guide with the same position?

## Warning: guide_axis(): Discarding guide on merge. Do you have more than one
## guide with the same position?

## Warning: guide_axis(): Discarding guide on merge. Do you have more than one
## guide with the same position?

averagemodel<-average(result)
## Registered S3 methods overwritten by 'MuMIn':
##   method         from   
##   nobs.pgls      caper  
##   nobs.phylolm   phylolm
##   logLik.phylolm phylolm
plot(averagemodel)
## Warning: The curvature argument has been deprecated in favour of strength

Predictions of SVL and body mass – using phylogenetic eigenvectors maps (PEM)

  • Data and tree
fossiltreePEM<-read.nexus("Lepidosaurtree.trees.nex")
fossildataPEM<-read.table("Dataset1-lepidosaurs.txt",header=TRUE,stringsAsFactor=FALSE,)
fossildataV<-subset(fossildataPEM, !is.na(V))
fossildataV[,c(3:5,7:9)]<-log(fossildataV[,c(3:5,7:9)])

For SVL

  • Match the phylogeny and dataset
treePEM<-drop.tip(fossiltreePEM, setdiff(fossiltreePEM$tip.label, fossildataV$Species))
treedrop <- drop.tip(treePEM,fossildataV[is.na(fossildataV[,"SVL"]),"Species"])
grloc <- getGraphLocations(treePEM,fossildataV[is.na(fossildataV[,"SVL"]),"Species"])
sporder <- match(attr(grloc$x,"vlabel")[grloc$x$vertex$species],fossildataV[,"Species"])
  • Build the PEM
PEMfs <- list()
PEMfs[["V"]] <- PEM.fitSimple(y=fossildataV[sporder,"SVL"],
                    x=fossildataV[sporder,"V"],w=grloc$x,d="distance",sp="species",
                    lower=0,upper=1)
PEMfs[["none"]] <- PEM.fitSimple(y=fossildataV[sporder,"SVL"],
                    x=NULL,w=grloc$x,d="distance",sp="species",lower=0,upper=1)
for(m in c("V","none")) print(PEMfs[[m]]$optim$par)
## [1] 0.4981507
## [1] 0.1197493
rm(m)
  • Select the best model (with V as a co-predictor vs without co-predictors) based on AICc
PEMAIC <- list()
PEMAIC[["V"]] <- lmforwardsequentialAICc(y=fossildataV[sporder,"SVL"],
                                         x=fossildataV[sporder,"V",drop=FALSE],object=PEMfs[["V"]])
PEMAIC[["none"]] <- lmforwardsequentialAICc(y=fossildataV[sporder,"SVL"],object=PEMfs[["none"]])
for(m in c("V","none"))
  cat(m,summary(PEMAIC[[m]])$adj,PEMAIC[[m]]$AICc,"\n")
## V 0.9945104 -248.9384 
## none 0.9913577 -124.1351
rm(m)

summary(PEMAIC[["V"]])
## 
## Call:
## lm(formula = as.formula(paste(p1, p2, sep = "")), data = df1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.178636 -0.035423  0.004214  0.039172  0.153809 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.071635   0.066712  31.053  < 2e-16 ***
## V            0.393493   0.009214  42.705  < 2e-16 ***
## V_1         -9.100852   0.108193 -84.116  < 2e-16 ***
## V_2         -4.704946   0.126827 -37.097  < 2e-16 ***
## V_30         1.372985   0.083670  16.410  < 2e-16 ***
## V_6          0.986086   0.110409   8.931 3.32e-15 ***
## V_10         0.858355   0.118694   7.232 3.56e-11 ***
## V_8         -0.984514   0.083734 -11.758  < 2e-16 ***
## V_63        -0.760769   0.084320  -9.022 1.98e-15 ***
## V_7          0.862697   0.083710  10.306  < 2e-16 ***
## V_31         0.874262   0.084017  10.406  < 2e-16 ***
## V_117        0.819127   0.084435   9.701  < 2e-16 ***
## V_27        -1.240447   0.106374 -11.661  < 2e-16 ***
## V_77        -0.733867   0.083703  -8.768 8.30e-15 ***
## V_229       -0.819382   0.086023  -9.525  < 2e-16 ***
## V_156        0.507389   0.085388   5.942 2.39e-08 ***
## V_124       -0.408268   0.089180  -4.578 1.08e-05 ***
## V_5          0.388503   0.087559   4.437 1.91e-05 ***
## V_180       -0.572372   0.083671  -6.841 2.72e-10 ***
## V_14        -0.450315   0.085040  -5.295 4.87e-07 ***
## V_225       -0.546724   0.083745  -6.528 1.33e-09 ***
## V_48        -0.617529   0.083862  -7.364 1.77e-11 ***
## V_11         0.632745   0.084051   7.528 7.39e-12 ***
## V_108        0.675610   0.084757   7.971 6.76e-13 ***
## V_76         0.648948   0.084352   7.693 3.05e-12 ***
## V_44        -0.384803   0.085978  -4.476 1.64e-05 ***
## V_17         0.485665   0.083976   5.783 5.10e-08 ***
## V_80         0.535035   0.083675   6.394 2.61e-09 ***
## V_205        0.551436   0.083744   6.585 1.00e-09 ***
## V_23        -0.401879   0.084055  -4.781 4.61e-06 ***
## V_59        -0.320047   0.085352  -3.750 0.000265 ***
## V_66         0.544710   0.084217   6.468 1.81e-09 ***
## V_13        -0.499332   0.083867  -5.954 2.26e-08 ***
## V_29        -0.594629   0.085593  -6.947 1.57e-10 ***
## V_170        0.403913   0.083791   4.820 3.90e-06 ***
## V_109       -0.541708   0.084637  -6.400 2.54e-09 ***
## V_98        -0.558359   0.085053  -6.565 1.11e-09 ***
## V_179       -0.390290   0.083857  -4.654 7.86e-06 ***
## V_18        -0.529632   0.085127  -6.222 6.15e-09 ***
## V_206       -0.439153   0.083739  -5.244 6.13e-07 ***
## V_54        -0.453173   0.083883  -5.402 3.00e-07 ***
## V_125       -0.458915   0.084018  -5.462 2.28e-07 ***
## V_142        0.402447   0.083669   4.810 4.08e-06 ***
## V_235        0.429425   0.083804   5.124 1.05e-06 ***
## V_155        0.370097   0.083716   4.421 2.04e-05 ***
## V_34         0.351834   0.083814   4.198 4.94e-05 ***
## V_194        0.273009   0.085159   3.206 0.001692 ** 
## V_183       -0.376601   0.083670  -4.501 1.48e-05 ***
## V_169       -0.344993   0.083766  -4.119 6.71e-05 ***
## V_112        0.278725   0.084640   3.293 0.001274 ** 
## V_39        -0.293362   0.084304  -3.480 0.000682 ***
## V_55        -0.391209   0.083709  -4.673 7.26e-06 ***
## V_69         0.380311   0.083680   4.545 1.24e-05 ***
## V_129       -0.395590   0.083754  -4.723 5.89e-06 ***
## V_173       -0.381593   0.083692  -4.559 1.16e-05 ***
## V_152       -0.131500   0.088366  -1.488 0.139119    
## V_56         0.147425   0.087294   1.689 0.093629 .  
## V_62         0.424582   0.084164   5.045 1.48e-06 ***
## V_104        0.367887   0.083752   4.393 2.29e-05 ***
## V_72        -0.370855   0.083789  -4.426 2.00e-05 ***
## V_88        -0.321197   0.083667  -3.839 0.000191 ***
## V_178        0.381534   0.084033   4.540 1.26e-05 ***
## V_133       -0.435337   0.084932  -5.126 1.04e-06 ***
## V_58         0.361402   0.083871   4.309 3.19e-05 ***
## V_68         0.363490   0.084099   4.322 3.03e-05 ***
## V_172       -0.275813   0.083706  -3.295 0.001266 ** 
## V_176        0.264259   0.083676   3.158 0.001972 ** 
## V_151        0.262899   0.083679   3.142 0.002077 ** 
## V_101       -0.279064   0.083671  -3.335 0.001109 ** 
## V_130        0.354518   0.084439   4.199 4.93e-05 ***
## V_20         0.450383   0.087594   5.142 9.68e-07 ***
## V_16         0.364896   0.084960   4.295 3.38e-05 ***
## V_4          0.448939   0.088214   5.089 1.22e-06 ***
## V_134       -0.322932   0.084261  -3.833 0.000196 ***
## V_45        -0.265568   0.083671  -3.174 0.001874 ** 
## V_120        0.273162   0.083703   3.263 0.001404 ** 
## V_227       -0.273278   0.083759  -3.263 0.001408 ** 
## V_33         0.347907   0.085649   4.062 8.33e-05 ***
## V_148        0.267991   0.083752   3.200 0.001725 ** 
## V_118       -0.283059   0.083948  -3.372 0.000982 ***
## V_138        0.267968   0.083770   3.199 0.001731 ** 
## V_121       -0.253752   0.083678  -3.032 0.002925 ** 
## V_42         0.299744   0.084538   3.546 0.000544 ***
## V_97        -0.272391   0.083932  -3.245 0.001489 ** 
## V_116       -0.260448   0.083813  -3.107 0.002314 ** 
## V_73        -0.209929   0.083934  -2.501 0.013613 *  
## V_85         0.228299   0.083680   2.728 0.007242 ** 
## V_19         0.264901   0.084013   3.153 0.002003 ** 
## V_28        -0.307979   0.086077  -3.578 0.000486 ***
## V_26         0.224630   0.083682   2.684 0.008207 ** 
## V_21        -0.292578   0.085859  -3.408 0.000871 ***
## V_61        -0.242060   0.083830  -2.888 0.004544 ** 
## V_110       -0.242602   0.083878  -2.892 0.004479 ** 
## V_177        0.245034   0.084113   2.913 0.004208 ** 
## V_193       -0.215603   0.083688  -2.576 0.011095 *  
## V_99        -0.244967   0.085178  -2.876 0.004704 ** 
## V_95        -0.219343   0.084007  -2.611 0.010080 *  
## V_187        0.201047   0.083676   2.403 0.017677 *  
## V_115       -0.194921   0.083706  -2.329 0.021409 *  
## V_40        -0.204034   0.083748  -2.436 0.016183 *  
## V_35         0.231018   0.086910   2.658 0.008837 ** 
## V_38         0.223027   0.085936   2.595 0.010530 *  
## V_150       -0.203677   0.084038  -2.424 0.016732 *  
## V_222       -0.198031   0.083780  -2.364 0.019564 *  
## V_168       -0.188724   0.083980  -2.247 0.026296 *  
## V_141        0.183297   0.083777   2.188 0.030449 *  
## V_12         0.180644   0.083706   2.158 0.032745 *  
## V_91        -0.174506   0.084069  -2.076 0.039873 *  
## V_201       -0.168706   0.083691  -2.016 0.045863 *  
## V_67        -0.162550   0.083667  -1.943 0.054182 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08367 on 131 degrees of freedom
## Multiple R-squared:  0.997,  Adjusted R-squared:  0.9945 
## F-statistic: 399.9 on 109 and 131 DF,  p-value: < 2.2e-16

The model with V as a co-predictor has the best fit.

  • Predicting missing values for SVL
tf<-list(V=identity, none=identity)
m <- "V" ; atr <- tf[[m]](fossildataV[is.na(fossildataV[,"SVL"]),m,drop=FALSE])
resultsPEM<-predict(object=PEMfs[[m]],targets=grloc,lmobject=PEMAIC[[m]],newdata=atr,interval="confidence")

# Predicted SVL, with upper and lower limits of the confidence interval for the prediction:
exp(resultsPEM$values); exp(resultsPEM$upper); exp(resultsPEM$lower)
##                              [,1]
## Antarcticoolithus_bradyi 6665.867
##                              [,1]
## Antarcticoolithus_bradyi 7888.606
##                              [,1]
## Antarcticoolithus_bradyi 5632.654

The code to predict missing values for body mass is identical to the one for SVL; simply replace ‘SVL’ with ‘BM’.

  • Plotting the regression for Egg volume ~ SVL with the value of SVL for Antarcticoolithus estimated from PEM
data[1,7]<-6665.867
dataV2<-subset(data, !is.na(V))

ggplot(dataV2, aes(log(SVL), log(V), color=Suborder)) +
  geom_point(size=4) +
  xlab("ln snout-vent length (mm)") +
  ylab("ln egg volume (mm3)") +
  geom_abline(intercept=-0.7990941, slope=1.6708400, colour="lightblue") +
  theme(panel.background = element_rect(fill="black")) +
  theme(axis.text=element_text(size=15),
        axis.title=element_text(size=15),
        legend.text=element_text(size=15),
        legend.title=element_text(size=17)) +
  scale_colour_brewer("Suborder", palette="Accent")

  • Plotting the regression for Egg volume ~ Body mass with the value of body mass for Antarcticoolithus estimated from PEM
data[1,8]<-456096.7
dataV2<-subset(data, !is.na(V))

ggplot(dataV2, aes(log(BM), log(V), color=Suborder)) +
  geom_point(size=4) +
  xlab("ln body mass (g)") +
  ylab("ln egg volume (mm3)") +
  geom_abline(intercept=5.050556, slope=0.619536, colour="lightblue") +
  theme(panel.background = element_rect(fill="black")) +
  theme(axis.text=element_text(size=15),
        axis.title=element_text(size=15),
        legend.text=element_text(size=15),
        legend.title=element_text(size=17)) +
  scale_colour_brewer("Suborder", palette="Set1")

Phylogenetic ANOVA

  • Subsetting the data and tree, and compiling relative eggshell thickness (RT) with RT = (eggshell thickness / egg length)
RT<-data$Thickness/data$L; data<-cbind(data, RT)
dataA<-subset(data, !is.na(RT)&!is.na(OV))
dataA$RT<-log(dataA$RT)
anovatree<-drop.tip(tree, setdiff(tree$tip.label, rownames(dataA)))

ANOVA with 2 groups (oviparous and viviparous)

  • Subsetting the dataset and performing the ANOVA
datANOVA<-as.data.frame(cbind(dataA$RT, dataA$OV), header=T)
row.names(datANOVA)<-row.names(dataA); colnames(datANOVA)<-c("RT", "OV")
datANOVA<-ReorderData(anovatree, datANOVA, taxa.names="row names")

phylANOVA(anovatree, datANOVA$OV, datANOVA$RT, p.adj="BH")
## Warning: no labels for x. Assuming order of tree$tip.label.
## 
## Warning: no labels for y. Assuming order of tree$tip.label.
## ANOVA table: Phylogenetic ANOVA
## 
## Response: y
##             Sum Sq  Mean Sq  F value Pr(>F)
## x         2.884385 2.884385 6.074072  0.006
## Residual 31.816186 0.474868                
## 
## P-value based on simulation.
## ---------
## 
## Pairwise posthoc test using method = "BH"
## 
## Pairwise t-values:
##           1        2
## 1  0.000000 2.464563
## 2 -2.464563 0.000000
## 
## Pairwise corrected P-values:
##       1     2
## 1 1.000 0.006
## 2 0.006 1.000
## ---------
  • Boxplot of both groups
ggplot(dataA, aes(x=OV, y=RT, fill=OV)) +
  geom_boxplot() +
  theme(axis.text=element_text(size=15),
        axis.title=element_text(size=15),
        legend.text=element_text(size=15),
        legend.title=element_text(size=17)) +
  scale_fill_hue()

log(data$RT)[1] # Value for Antarcticoolithus, intermediate between the groups
## [1] 0.9120565
  • Extract percentiles for each boxplot
p2<-ggplot(dataA, aes(x=OV, y=RT, fill=OV)) + geom_boxplot()
ggplot_build(p2)$data
## [[1]]
##      fill       ymin      lower      middle     upper     ymax outliers
## 1 #F8766D -0.1251631  0.9808293  1.35720692 1.7429693 2.784423 3.442019
## 2 #00BFC4 -0.3462762 -0.3308263 -0.02868824 0.8270896 2.503459         
##   notchupper notchlower x flipped_aes PANEL group ymin_final ymax_final  xmin
## 1  1.5065672  1.2078466 1       FALSE     1     1 -0.1251631   3.442019 0.625
## 2  0.8860653 -0.9434418 2       FALSE     1     2 -0.3462762   2.503459 1.625
##    xmax xid newx new_width weight colour size alpha shape linetype
## 1 1.375   1    1      0.75      1 grey20  0.5    NA    19    solid
## 2 2.375   2    2      0.75      1 grey20  0.5    NA    19    solid

ANOVA with 3 groups (oviparous and laid, oviparous and dissected from the oviduct, and viviparous)

  • Subsetting the dataset and performing the ANOVA
datANOVA2<-as.data.frame(cbind(dataA$RT, dataA$Strategy), header=T)
row.names(datANOVA2)<-row.names(dataA); colnames(datANOVA2)<-c("RT", "Strategy")
datANOVA2<-ReorderData(anovatree, datANOVA2, taxa.names="row names")

phylANOVA(anovatree, datANOVA2$Strategy, datANOVA2$RT, p.adj="BH")
## Warning: no labels for x. Assuming order of tree$tip.label.
## 
## Warning: no labels for y. Assuming order of tree$tip.label.
## ANOVA table: Phylogenetic ANOVA
## 
## Response: y
##            Sum Sq  Mean Sq  F value Pr(>F)
## x         7.86154 3.930770 9.666176  0.003
## Residual 26.83903 0.406652                
## 
## P-value based on simulation.
## ---------
## 
## Pairwise posthoc test using method = "BH"
## 
## Pairwise t-values:
##           1         2        3
## 1  0.000000 -1.534310 3.960211
## 2  1.534310  0.000000 4.382854
## 3 -3.960211 -4.382854 0.000000
## 
## Pairwise corrected P-values:
##        1      2      3
## 1 1.0000 0.3420 0.0015
## 2 0.3420 1.0000 0.0015
## 3 0.0015 0.0015 1.0000
## ---------
  • Boxplot of all groups
ggplot(dataA, aes(x=Strategy, y=RT, fill=Strategy)) +
  geom_boxplot() +
  theme(axis.text=element_text(size=15),
        axis.title=element_text(size=15),
        legend.text=element_text(size=15),
        legend.title=element_text(size=17)) +
  scale_fill_hue()

  • Extract percentiles for each boxplot
p2<-ggplot(dataA, aes(x=Strategy, y=RT, fill=Strategy)) + geom_boxplot()
ggplot_build(p2)$data
## [[1]]
##      fill       ymin      lower     middle       upper      ymax outliers
## 1 #F8766D -0.1251631  0.9794977  1.3139737  1.74296931 2.7844232         
## 2 #00BA38  0.3137979  1.3231783  1.6757606  2.06667501 2.5034588 3.442019
## 3 #619CFF -0.3462762 -0.3359763 -0.3256763 -0.02868824 0.2682998         
##    notchupper notchlower x flipped_aes PANEL group ymin_final ymax_final  xmin
## 1  1.47662916  1.1513182 1       FALSE     1     1 -0.1251631  2.7844232 0.625
## 2  2.02995340  1.3215677 2       FALSE     1     2  0.3137979  3.4420194 1.625
## 3 -0.04536403 -0.6059886 3       FALSE     1     3 -0.3462762  0.2682998 2.625
##    xmax xid newx new_width weight colour size alpha shape linetype
## 1 1.375   1    1      0.75      1 grey20  0.5    NA    19    solid
## 2 2.375   2    2      0.75      1 grey20  0.5    NA    19    solid
## 3 3.375   3    3      0.75      1 grey20  0.5    NA    19    solid

2 – Amniote dataset

This part requires the use of dataset 2 (“Dataset2-amniotes.txt”; see also Supplementary Table 3 in Legendre et al., 2020), and amniote phylogenetic tree (“Amniotetree.nex”).

  • Loading and matching the data and tree
dataS<-read.table("Dataset2-amniotes.txt", header=T)
treeS<-read.nexus("Amniotetree.nex")
treeS<-drop.tip(treeS, setdiff(treeS$tip.label, dataS$Taxon))
dataS<-read.table("Dataset2-amniotes.txt", header=T, row.names="Taxon")
Ws<-diag(vcv.phylo(treeS))

PGLS – Eggshell thickness ~ Egg mass

For hard-shelled eggs

  • Subset data and tree
dataS<-read.table("Dataset2-amniotes.txt", header=T)
datahard<-dataS[-c(50,86:88,93:95,115,118,120:145,147,149),]
treehard<-drop.tip(treeS, setdiff(treeS$tip.label, datahard$Taxon))
dataS<-read.table("Dataset2-amniotes.txt", header=T, row.names="Taxon")
datahard<-dataS[-c(50,86:88,93:95,115,118,120:145,147,149),]
Wh<-diag(vcv.phylo(treehard))
  • Best fit for alpha parameter in OU model
alpha <- seq(0, 1, 0.1)
fit <- list()
form <- log(Eggshell_thickness)~log(Egg_mass)
for (i in seq_along(alpha)) {
  cor <- corMartins(alpha[i], phy = treehard, fixed = T)
  fit[[i]] <- gls(form, correlation = cor, data = datahard, na.action=na.exclude, weights=varFixed(~Wh), method = "ML")
}
sapply(fit, logLik)
##  [1]       Inf -34.53566 -35.81116 -36.82220 -37.35043 -37.58546 -37.68186
##  [8] -37.72003 -37.73494 -37.74073 -37.74298
  • Best fit for g parameter in EB (Early Burst) model
g <- seq(0.1, 1, 0.1)
fit <- list()
form <- log(Eggshell_thickness)~log(Egg_mass)
for (i in seq_along(g)) {
  cor <- corBlomberg(g[i], phy = treehard, fixed = T)
  fit[[i]] <- gls(form, correlation = cor, data = datahard, na.action=na.exclude, weights=varFixed(~Wh), method = "ML")
}
sapply(fit, logLik)
##  [1] -47.38547 -58.17006 -63.69614 -67.03638 -69.29137 -70.92493 -72.16651
##  [8] -73.14384 -73.93423 -74.58743
  • Building PGLS models and running model selection using AICc
BM<-gls(log(Eggshell_thickness)~log(Egg_mass), data=datahard, correlation=corBrownian(phy=treehard), weights=varFixed(~Wh), method="ML")
OU<-gls(log(Eggshell_thickness)~log(Egg_mass), data=datahard, correlation=corMartins(0.1, phy=treehard, fixed=T), weights=varFixed(~Wh), method="ML")
Lambda<-gls(log(Eggshell_thickness)~log(Egg_mass), data=datahard, correlation=corPagel(1, phy=treehard), weights=varFixed(~Wh), method="ML")
EB<-gls(log(Eggshell_thickness)~log(Egg_mass), data=datahard, correlation=corBlomberg(0.1, phy=treehard, fixed=T), weights=varFixed(~Wh), method="ML")
OLS<-gls(log(Eggshell_thickness)~log(Egg_mass), data=datahard, method="ML")

Cand.models = list()
Cand.models[[1]] = BM
Cand.models[[2]] = OU
Cand.models[[3]] = Lambda
Cand.models[[4]] = EB
Cand.models[[5]] = OLS

Modnames = paste(c("BM", "OU", "Lambda", "EB", "OLS"), sep = " ")
aictab(cand.set = Cand.models, modnames = Modnames, sort = T)
summary(Lambda)
## Generalized least squares fit by maximum likelihood
##   Model: log(Eggshell_thickness) ~ log(Egg_mass) 
##   Data: datahard 
##        AIC      BIC    logLik
##   73.45572 84.32971 -32.72786
## 
## Correlation Structure: corPagel
##  Formula: ~1 
##  Parameter estimate(s):
##    lambda 
## 0.3282094 
## Variance function:
##  Structure: fixed weights
##  Formula: ~Wh 
## 
## Coefficients:
##                  Value  Std.Error  t-value p-value
## (Intercept)   4.321066 0.12577804 34.35469       0
## log(Egg_mass) 0.439690 0.01721966 25.53422       0
## 
##  Correlation: 
##               (Intr)
## log(Egg_mass) -0.483
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.87276697 -0.89716270 -0.55727271 -0.03185514  2.23701950 
## 
## Residual standard error: 0.02058541 
## Degrees of freedom: 112 total; 110 residual
  • Estimating the pseudo R-squared using caper
dataS<-read.table("Dataset2-amniotes.txt", header=T)
datahard<-dataS[-c(50,86:88,93:95,115,118,120:145,147,149),]
datacomp<-comparative.data(phy=treehard, data=datahard, names.col="Taxon")
pgls<-pgls(log(Eggshell_thickness)~log(Egg_mass), data=datacomp, lambda="ML")
summary(pgls)
## 
## Call:
## pgls(formula = log(Eggshell_thickness) ~ log(Egg_mass), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.06443 -0.01539 -0.00283  0.01159  0.05560 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.328
##    lower bound : 0.000, p = 0.0012278
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.098, 0.597)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)   4.334504   0.128404  33.757 < 2.2e-16 ***
## log(Egg_mass) 0.436835   0.017409  25.092 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02103 on 110 degrees of freedom
## Multiple R-squared: 0.8513,  Adjusted R-squared: 0.8499 
## F-statistic: 629.6 on 1 and 110 DF,  p-value: < 2.2e-16

For soft-shelled eggs

  • Subset data and tree
datasoft<-dataS[c(50,86:88,93,94,115,118,120:145,147,149),]
treesoft<-drop.tip(treeS, setdiff(treeS$tip.label, datasoft$Taxon))
dataS<-read.table("Dataset2-amniotes.txt", header=T, row.names="Taxon")
datasoft<-dataS[c(50,86:88,93,94,115,118,120:145,147,149),]
Ws<-diag(vcv.phylo(treesoft))
  • Best fit for alpha parameter in OU model
alpha <- seq(0, 1, 0.1)
fit <- list()
form <- log(Eggshell_thickness)~log(Egg_mass)
for (i in seq_along(alpha)) {
  cor <- corMartins(alpha[i], phy = treesoft, fixed = T)
  fit[[i]] <- gls(form, correlation = cor, data = datasoft, na.action=na.exclude, weights=varFixed(~Ws), method = "ML")
}
sapply(fit, logLik)
##  [1]       Inf -31.89120 -31.76165 -31.77510 -31.78728 -31.79212 -31.79376
##  [8] -31.79428 -31.79444 -31.79448 -31.79450
  • Best fit for g parameter in EB model
g <- seq(0.1, 1, 0.1)
fit <- list()
form <- log(Eggshell_thickness)~log(Egg_mass)
for (i in seq_along(g)) {
  cor <- corBlomberg(g[i], phy = treesoft, fixed = T)
  fit[[i]] <- gls(form, correlation = cor, data = datasoft, na.action=na.exclude, weights=varFixed(~Ws), method = "ML")
}
sapply(fit, logLik)
##  [1] -31.47980 -33.92230 -36.02601 -37.59877 -38.79144 -39.72307 -40.47140
##  [8] -41.08692 -41.60329 -42.04372
  • Building PGLS models and running model selection using AICc
BM<-gls(log(Eggshell_thickness)~log(Egg_mass), data=datasoft, correlation=corBrownian(phy=treesoft), weights=varFixed(~Ws), method="ML")
OU<-gls(log(Eggshell_thickness)~log(Egg_mass), data=datasoft, correlation=corMartins(1, phy=treesoft), weights=varFixed(~Ws), method="ML")
Lambda<-gls(log(Eggshell_thickness)~log(Egg_mass), data=datasoft, correlation=corPagel(0.2, phy=treesoft), weights=varFixed(~Ws), method="ML")
EB<-gls(log(Eggshell_thickness)~log(Egg_mass), data=datasoft, correlation=corBlomberg(0.1, phy=treesoft, fixed=T), weights=varFixed(~Ws), method="ML")
OLS<-gls(log(Eggshell_thickness)~log(Egg_mass), data=datasoft, method="ML")

Cand.models = list()
Cand.models[[1]] = BM
Cand.models[[2]] = OU
Cand.models[[3]] = Lambda
Cand.models[[4]] = EB
Cand.models[[5]] = OLS

Modnames = paste(c("BM", "OU", "Lambda", "EB", "OLS"), sep = " ")
aictab(cand.set = Cand.models, modnames = Modnames, sort = T)
summary(Lambda)
## Generalized least squares fit by maximum likelihood
##   Model: log(Eggshell_thickness) ~ log(Egg_mass) 
##   Data: datasoft 
##        AIC      BIC    logLik
##   69.89509 76.22916 -30.94754
## 
## Correlation Structure: corPagel
##  Formula: ~1 
##  Parameter estimate(s):
##    lambda 
## 0.2416992 
## Variance function:
##  Structure: fixed weights
##  Formula: ~Ws 
## 
## Coefficients:
##                   Value  Std.Error  t-value p-value
## (Intercept)   2.1224777 0.21594179 9.828935       0
## log(Egg_mass) 0.3930565 0.05253711 7.481503       0
## 
##  Correlation: 
##               (Intr)
## log(Egg_mass) -0.691
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -2.2225960 -0.7969044 -0.3082419  0.5311175  1.5319272 
## 
## Residual standard error: 0.03652805 
## Degrees of freedom: 36 total; 34 residual
  • Estimating the pseudo R-squared using caper
dataS<-read.table("Dataset2-amniotes.txt", header=T)
datasoft<-dataS[c(50,86:88,93,94,115,118,120:145,147,149),]
datacomp<-comparative.data(phy=multi2di(treesoft), data=datasoft, names.col="Taxon")
pgls<-pgls(log(Eggshell_thickness)~log(Egg_mass), data=datacomp, lambda="ML")
summary(pgls)
## 
## Call:
## pgls(formula = log(Eggshell_thickness) ~ log(Egg_mass), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.103790 -0.018297 -0.004759  0.035618  0.068005 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.235
##    lower bound : 0.000, p = 0.21201
##    upper bound : 1.000, p = 2.3519e-06
##    95.0% CI   : (NA, 0.751)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)   2.113363   0.218395  9.6768 2.687e-11 ***
## log(Egg_mass) 0.390266   0.051037  7.6467 6.899e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03763 on 34 degrees of freedom
## Multiple R-squared: 0.6323,  Adjusted R-squared: 0.6215 
## F-statistic: 58.47 on 1 and 34 DF,  p-value: 6.899e-09

Plot with both regression lines (hard-shelled and soft-shelled)

dataS<-read.table("Dataset2-amniotes.txt", header=T)
ggplot(dataS, aes(log(Egg_mass), log(Eggshell_thickness), colour=Clade)) +
  geom_point(size=4) +
  xlab("ln egg mass (g)") +
  ylab("ln calcareous layer thickness (µm)") +
  geom_abline(intercept=4.321066, slope=0.439690, colour="turquoise", size=1.3) +
  geom_abline(intercept=2.1224777, slope=0.3930565, colour="red", size=1.3) +
  theme(panel.background = element_rect(fill="black")) +
  theme(axis.text=element_text(size=15),
        axis.title=element_text(size=15),
        legend.text=element_text(size=15),
        legend.title=element_text(size=17)) +
  scale_colour_brewer("Group", palette="Set1")

Ancestral state reconstruction

  • For ratio (Calcareous layer thickness / Egg mass), using fastAnc
dataS<-read.table("Dataset2-amniotes.txt", header=T); dataS<-dataS[-95,]
treeplot<-drop.tip(treeS, setdiff(treeS$tip.label, dataS$Taxon))
dataS<-read.table("Dataset2-amniotes.txt", header=T, row.names="Taxon"); dataS<-dataS[-95,]
dataplot=as.matrix(dataS[,3]); names(dataplot)<-rownames(dataS); colnames(dataplot)<-"CL_thickness"
dataplot<-log10(dataplot)

fit<-fastAnc(treeplot,dataplot,vars=T,CI=T) # Ancestral states for each node
obj<-contMap(treeplot,dataplot,plot=F)

plot(setMap(obj, colors=rev(brewer.pal(10, "Spectral"))), fsize=0.4,lwd=4)

  • Same as above, but with an ultrametric tree, as shown in Fig. 3 of Legendre et al. (2020)
treeplot<-force.ultrametric(treeplot)
fit<-fastAnc(treeplot,dataplot,vars=T,CI=T) # Ancestral states for each node
obj<-contMap(treeplot,dataplot,plot=F)

plot(setMap(obj, colors=rev(brewer.pal(10, "Spectral"))), fsize=0.4,lwd=4)

  • For presence/absence of the prismatic layer, using make.simmap
dataS<-read.table("Dataset2-amniotes.txt", header=T)
dataS<-dataS[-95,]
treeS<-read.nexus("Amniotetree.nex")
treeS<-drop.tip(treeS, setdiff(treeS$tip.label, dataS$Taxon))
prismatic<-dataS[,5]; names(prismatic)<-dataS$Taxon
cols<-setNames(c("royalblue","red3"),levels(prismatic))

mtree<-make.simmap(treeS, prismatic, model="ER")
## make.simmap is sampling character histories conditioned on the transition matrix
## 
## Q =
##                Absent       Present
## Absent  -0.0003949281  0.0003949281
## Present  0.0003949281 -0.0003949281
## (estimated using likelihood);
## and (mean) root node prior probabilities
## pi =
##  Absent Present 
##     0.5     0.5
## Done.
plot(mtree,cols,fsize=0.5,ftype="i")
add.simmap.legend(colors=cols,prompt=FALSE,x=0.9*par()$usr[1],
                  y=0.8*par()$usr[3],fsize=0.8)

# with 1000 simulations of stochastic character maps from the data:
mtrees<-make.simmap(treeS, prismatic, model="ER", nsim=1000)
## make.simmap is sampling character histories conditioned on the transition matrix
## 
## Q =
##                Absent       Present
## Absent  -0.0003949281  0.0003949281
## Present  0.0003949281 -0.0003949281
## (estimated using likelihood);
## and (mean) root node prior probabilities
## pi =
##  Absent Present 
##     0.5     0.5
## Done.
par(mfrow=c(10,10))
null<-sapply(mtrees,plotSimmap,colors=cols,lwd=1,ftype="off")

pd<-summary(mtrees); pd
## 1000 trees with a mapped discrete character with states:
##  Absent, Present 
## 
## trees have 3.763 changes between states on average
## 
## changes are of the following types:
##      Absent,Present Present,Absent
## x->y          1.027          2.736
## 
## mean total time spent in each state is:
##            Absent      Present   total
## raw  2712.0360180 6749.2236767 9461.26
## prop    0.2866464    0.7133536    1.00
par(mfrow=c(1,1))
plot(pd,fsize=0.6,ftype="i",colors=cols,ylim=c(-2,Ntip(treeS)))
add.simmap.legend(colors=cols[2:1],prompt=FALSE,x=0,y=-2,vertical=FALSE)

# density map of the above
objprism<-densityMap(mtrees,states=levels(prismatic)[2:1],plot=FALSE)
## sorry - this might take a while; please be patient
n<-length(objprism$cols)
objprism$cols[1:n]<-colorRampPalette(c("royalblue","red3"))(n)
plot(objprism,size=c(0.6,1),fsize=0.6,lwd=4)

Boxplot for ratio (Calcareous layer thickness / Egg mass)

dataS<-read.table("Dataset2-amniotes.txt", header=T)
RTM<-dataS$Eggshell_thickness/dataS$Egg_mass
dataS<-cbind(dataS, RTM)
dataSboxplot<-dataS %>% filter(!Clade=="Test")

ggplot(dataSboxplot, aes(x=Clade, y=log(RTM), fill=Clade)) + 
  geom_boxplot() +
  theme(panel.background = element_rect(fill="white")) +
  theme(axis.text=element_text(size=10, angle = 90),
        axis.title=element_text(size=15),
        legend.text=element_text(size=15),
        legend.title=element_text(size=17)) +
  scale_fill_brewer(palette="Set1")

  • Extract percentiles for each boxplot
p<-ggplot(dataSboxplot, aes(x=Clade, y=log(RTM), fill=Clade)) + 
  geom_boxplot()
ggplot_build(p)$data
## [[1]]
##      fill       ymin      lower     middle      upper       ymax
## 1 #F8766D -4.1737717 -4.1737717 -4.1737717 -4.1737717 -4.1737717
## 2 #D39200  0.4296871  1.8265428  2.4120341  2.9394662  4.1207778
## 3 #93AA00  0.2636401  1.6216397  2.5162801  2.5794171  2.9038769
## 4 #00BA38  1.4714471  1.4714471  1.4714471  1.4714471  1.4714471
## 5 #00C19F  1.1282254  1.5449861  1.7324607  1.9034789  2.2137481
## 6 #00B9E3  4.4770380  4.4770380  4.4770380  4.4770380  4.4770380
## 7 #619CFF -2.1102132 -0.4793495 -0.1364957  0.9978001  1.9902104
## 8 #DB72FB  0.2876821  0.3729137  0.4581454  0.5433770  0.6286087
## 9 #FF61C3 -1.4307461  0.2392498  1.7701329  3.4510661  5.4647026
##                 outliers notchupper notchlower x flipped_aes         y PANEL
## 1                        -4.1737717 -4.1737717 1       FALSE -4.173772     1
## 2 -0.9350988, -0.6852781  2.6607121  2.1633561 2       FALSE        NA     1
## 3                         3.0207095  2.0118506 3       FALSE        NA     1
## 4                         1.4714471  1.4714471 4       FALSE  1.471447     1
## 5                         1.9327199  1.5322015 5       FALSE        NA     1
## 6                         4.4770380  4.4770380 6       FALSE  4.477038     1
## 7                         0.2372268 -0.5102183 7       FALSE        NA     1
## 8                         0.6485919  0.2676989 8       FALSE        NA     1
## 9                         2.5933530  0.9469128 9       FALSE        NA     1
##   group ymin_final ymax_final  xmin  xmax xid newx new_width weight colour size
## 1     1 -4.1737717 -4.1737717 0.625 1.375   1    1      0.75      1 grey20  0.5
## 2     2 -0.9350988  4.1207778 1.625 2.375   2    2      0.75      1 grey20  0.5
## 3     3  0.2636401  2.9038769 2.625 3.375   3    3      0.75      1 grey20  0.5
## 4     4  1.4714471  1.4714471 3.625 4.375   4    4      0.75      1 grey20  0.5
## 5     5  1.1282254  2.2137481 4.625 5.375   5    5      0.75      1 grey20  0.5
## 6     6  4.4770380  4.4770380 5.625 6.375   6    6      0.75      1 grey20  0.5
## 7     7 -2.1102132  1.9902104 6.625 7.375   7    7      0.75      1 grey20  0.5
## 8     8  0.2876821  0.6286087 7.625 8.375   8    8      0.75      1 grey20  0.5
## 9     9 -1.4307461  5.4647026 8.625 9.375   9    9      0.75      1 grey20  0.5
##   alpha shape linetype
## 1    NA    19    solid
## 2    NA    19    solid
## 3    NA    19    solid
## 4    NA    19    solid
## 5    NA    19    solid
## 6    NA    19    solid
## 7    NA    19    solid
## 8    NA    19    solid
## 9    NA    19    solid